library(fixest)
library(readr)
library(jiebaR)
library(readr)
library(quanteda)
library(stm)
library(keyATM)
library(readxl)
library(stargazer)
library(haven)
library(mediation)
library(ggplot2)
load("Data.RData")


#Figure 1 Structural Topic Models
wk<- worker()
new_user_word(wk,"十二五","n")
new_user_word(wk,"十三五","n")
new_user_word(wk,"十四五","n")
new_user_word(wk,"一带一路","n")
new_user_word(wk,"贸易战","n")
new_user_word(wk,"政策风险","n")
new_user_word(wk,"政治风险","n")
new_user_word(wk,"密切关注","n")
new_user_word(wk,"保持关注","n")
new_user_word(wk,"不确定","n")
new_user_word(wk,"不明确","n")
new_user_word(wk,"政策资源","n")
new_user_word(wk,"旅游资源","n")
new_user_word(wk,"文化资源","n")
new_user_word(wk,"经济形势","n")
new_user_word(wk,"新技术","n")
new_user_word(wk,"新领域","n")
new_user_word(wk,"证监会","n")
new_user_word(wk,"产业政策","n")
new_user_word(wk,"优惠政策","n")
new_user_word(wk,"5g","n")
new_user_word(wk,"云计算","n")
new_user_word(wk,"大湾区","n")
new_user_word(wk,"政府项目","n")

PoliQAsegwords <- as.tokens(sapply(as.character(PoliticalQA$Text), segment, wk))
PoliQAsegwords<- tokens_remove(PoliQAsegwords,pattern = stopwords::stopwords("zh", source = "misc"))
PoliQAsegwords <- dfm(PoliQAsegwords,remove_punct = TRUE, 
                      remove_symbols = TRUE,
                      remove_numbers = TRUE,
                      remove_url = TRUE,
                      remove_separators = TRUE)
PoliQAsegwords  <- dfm_remove(PoliQAsegwords, newstopwords)

PoliQADFM<- dfm_trim(PoliQAsegwords,min_termfreq = 948)
PoliQADFM<- convert(PoliQADFM,to='stm')                                         

POstm10topic <- stm(documents = PoliQADFM$documents, vocab = PoliQADFM$vocab, 
                      K=10, max.em.its = 200, data = PoliQADFM$meta,
                      init.type = "Spectral", verbose = FALSE)

png("Political Q&A.png",width = 8*300, height = 6*300, res = 300)
plot.STM(POstm10topic,n='3',family ='Arial Unicode MS',main='Topics of Political Q&As')
dev.off()

NPoliQAsegwords <- as.tokens(sapply(as.character(NPoliticalQA$Text), segment, wk))
NPoliQAsegwords<- tokens_remove(NPoliQAsegwords,pattern = stopwords::stopwords("zh", source = "misc"))
NPoliQAsegwords <- dfm(NPoliQAsegwords,remove_punct = TRUE, 
                       remove_symbols = TRUE,
                       remove_numbers = TRUE,
                       remove_url = TRUE,
                       remove_separators = TRUE)
NPoliQAsegwords  <- dfm_remove(NPoliQAsegwords, newstopwords)
NPoliQADFM<- dfm_trim(NPoliQAsegwords,min_termfreq = 6442)
NPoliQADFM<- convert(NPoliQADFM,to='stm')                                         
NPOstm10topic <- stm(documents = NPoliQADFM$documents, vocab = NPoliQADFM$vocab, 
                     K=10, max.em.its = 200, data = NPoliQADFM$meta,
                     init.type = "Spectral", verbose = FALSE)

png("NonPolitical Q&A.png",width = 8*300, height = 6*300, res = 300)
plot.STM(NPOstm10topic,n='3',family ='Arial Unicode MS',main='Topics of NonPolitical Q&As')
dev.off()

#Figure 2
TPAAll<- subset(Data,Expenditure>0)
TPAAll$Count<-1
TPAAll<- aggregate(TPAAll$Count,list(TPAAll$Year),FUN=sum,na.rm=TRUE)
colnames(TPAAll)<-c('Year','Firms')
ggplot(TPAAll, aes(x=Year, y=Firms)) + 
  geom_bar(stat="identity", position=position_dodge())+
  xlab('Year')+
  ylab('N of Participated Firms')


#Figure 3. Effect of the PRI on Expenditure
res_twfe = feols(Expenditure ~ i(Year, PRI,ref = c(2015))+PRI+Sale+TotalAsset+TaxRate+ROE+lgAge+PoliConn| Firm + Year, data=Data)
iplot(res_twfe,main='Effect of PRI on Expenditure')

